In [1]:
!pip install pandas plotly dash
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
pio.renderers.default = "notebook"   # or "iframe", "notebook_connected"
from dash import Dash, dcc, html, Input, Output
Requirement already satisfied: pandas in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (2.3.2)
Requirement already satisfied: plotly in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (6.3.0)
Requirement already satisfied: dash in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (3.2.0)
Requirement already satisfied: numpy>=1.26.0 in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from pandas) (2.3.2)
Requirement already satisfied: python-dateutil>=2.8.2 in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from pandas) (2.9.0.post0)
Requirement already satisfied: pytz>=2020.1 in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from pandas) (2025.2)
Requirement already satisfied: tzdata>=2022.7 in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from pandas) (2025.2)
Requirement already satisfied: narwhals>=1.15.1 in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from plotly) (2.3.0)
Requirement already satisfied: packaging in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from plotly) (25.0)
Requirement already satisfied: Flask<3.2,>=1.0.4 in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from dash) (3.1.2)
Requirement already satisfied: Werkzeug<3.2 in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from dash) (3.1.3)
Requirement already satisfied: importlib-metadata in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from dash) (8.7.0)
Requirement already satisfied: typing-extensions>=4.1.1 in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from dash) (4.15.0)
Requirement already satisfied: requests in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from dash) (2.32.5)
Requirement already satisfied: retrying in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from dash) (1.4.2)
Requirement already satisfied: nest-asyncio in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from dash) (1.6.0)
Requirement already satisfied: setuptools in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from dash) (80.9.0)
Requirement already satisfied: blinker>=1.9.0 in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from Flask<3.2,>=1.0.4->dash) (1.9.0)
Requirement already satisfied: click>=8.1.3 in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from Flask<3.2,>=1.0.4->dash) (8.2.1)
Requirement already satisfied: itsdangerous>=2.2.0 in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from Flask<3.2,>=1.0.4->dash) (2.2.0)
Requirement already satisfied: jinja2>=3.1.2 in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from Flask<3.2,>=1.0.4->dash) (3.1.6)
Requirement already satisfied: markupsafe>=2.1.1 in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from Flask<3.2,>=1.0.4->dash) (3.0.2)
Requirement already satisfied: colorama in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from click>=8.1.3->Flask<3.2,>=1.0.4->dash) (0.4.6)
Requirement already satisfied: six>=1.5 in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)
Requirement already satisfied: zipp>=3.20 in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from importlib-metadata->dash) (3.23.0)
Requirement already satisfied: charset_normalizer<4,>=2 in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from requests->dash) (3.4.3)
Requirement already satisfied: idna<4,>=2.5 in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from requests->dash) (3.10)
Requirement already satisfied: urllib3<3,>=1.21.1 in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from requests->dash) (2.5.0)
Requirement already satisfied: certifi>=2017.4.17 in c:\users\vemul\appdata\local\programs\python\python313\lib\site-packages (from requests->dash) (2025.8.3)
In [2]:
df = pd.read_csv('Supermart Grocery Sales - Retail Analytics Dataset.csv')
print("Initial Shape:", df.shape)
df.head()
Initial Shape: (9994, 11)
Out[2]:
Order ID Customer Name Category Sub Category City Order Date Region Sales Discount Profit State
0 OD1 Harish Oil & Masala Masalas Vellore 11-08-2017 North 1254 0.12 401.28 Tamil Nadu
1 OD2 Sudha Beverages Health Drinks Krishnagiri 11-08-2017 South 749 0.18 149.80 Tamil Nadu
2 OD3 Hussain Food Grains Atta & Flour Perambalur 06-12-2017 West 2360 0.21 165.20 Tamil Nadu
3 OD4 Jackson Fruits & Veggies Fresh Vegetables Dharmapuri 10-11-2016 South 896 0.25 89.60 Tamil Nadu
4 OD5 Ridhesh Food Grains Organic Staples Ooty 10-11-2016 South 2355 0.26 918.45 Tamil Nadu
In [3]:
import pandas as pd
import plotly.express as px
import plotly.io as pio
In [4]:
# Load dataset
df = pd.read_csv("Supermart Grocery Sales - Retail Analytics Dataset.csv")
In [5]:
# Convert order date
df['Order Date'] = pd.to_datetime(df['Order Date'], errors='coerce')
df['Year'] = df['Order Date'].dt.year
df['Month'] = df['Order Date'].dt.strftime('%b')
In [6]:
# Supermart Grocery Color Palette
supermart_colors = ["#2E8B57", "#FFA500", "#FFD700", "#8B4513", "#FF6347", "#6B8E23"]
In [7]:
# Plot 1: Sales by Category
sales_category = df.groupby("Category")["Sales"].sum().reset_index()
fig1 = px.bar(
    sales_category, x="Category", y="Sales",
    color="Category",
    color_discrete_sequence=supermart_colors,
    title="Total Sales by Category",
    hover_data={"Sales":":,.0f"}
)
fig1.update_layout(template="plotly_white")
In [8]:
# --- FIXED Plot 2: Monthly Sales Trend (clean syntax) ---

import plotly.express as px

# Ensure datetime & numeric
df["Order Date"] = pd.to_datetime(df["Order Date"], errors="coerce")
df["Sales"] = pd.to_numeric(df["Sales"], errors="coerce")

# Create numeric + label months
df["MonthNo"] = df["Order Date"].dt.month
month_labels = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
                "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]

monthly_sales = (
    df.dropna(subset=["MonthNo", "Sales"])
      .groupby("MonthNo", as_index=False)["Sales"].sum()
      .sort_values("MonthNo")
)
monthly_sales["Month"] = monthly_sales["MonthNo"].apply(lambda m: month_labels[int(m)-1])

# Use Supermart palette (fallback if not defined)
palette = [supermart_colors[0]] if "supermart_colors" in globals() else ["#2E8B57"]

# Plot
fig2 = px.line(
    monthly_sales,
    x="Month",
    y="Sales",
    markers=True,
    title="Monthly Sales Trend",
    color_discrete_sequence=palette
)
fig2.update_traces(hovertemplate="Month: %{x}<br>Sales: %{y:,}")
fig2.show()
In [9]:
# Plot 3: Sales Distribution by Category (Pie Chart)
sales_category = df.groupby("Category")["Sales"].sum().reset_index()
fig3 = px.pie(
    sales_category,
    values="Sales",
    names="Category",
    title="Sales Distribution by Category",
    color="Category",
    color_discrete_sequence=supermart_colors,
    hover_data=['Sales'] # Changed hover_data to a list containing only the column name
)
fig3.update_traces(
    textinfo="percent+label",
    pull=[0.05] * len(sales_category),
    hovertemplate="<b>%{label}</b><br>Sales: %{value:,.0f}<br>Percentage: %{percent}" # Added hovertemplate for formatting
)
fig3.update_layout(template="plotly_white")
fig3.show()
In [10]:
#  Plot 4: Top Cities by Sales
top_cities = df.groupby("City")["Sales"].sum().nlargest(7).reset_index()
fig4 = px.bar(
    top_cities, x="City", y="Sales",
    color="City",
    color_discrete_sequence=supermart_colors,
    title="Top 7 Cities by Sales",
    hover_data={"Sales":":,.0f"}
)
fig4.update_layout(xaxis_tickangle=45, template="plotly_white")
In [11]:
import plotly.express as px

#Plot A: Profit vs Discount Scatter
figA = px.scatter(
    df, x="Discount", y="Profit",
    color="Category",
    size="Sales",
    hover_data=["Sub Category", "City"],
    color_discrete_sequence=supermart_colors,
    title="Profit vs Discount by Category (Bubble Size = Sales)"
)
figA.update_traces(hovertemplate="Discount: %{x}<br>Profit: %{y}<br>Sales: %{marker.size:,}")
figA.show()
In [12]:
figA_faceted = px.scatter(
    df, x="Discount", y="Profit",
    size="Sales",
    color="Category",
    hover_data=["Sub Category", "City"],
    opacity=0.6,
    facet_col="Category",  # separate scatter for each category
    color_discrete_sequence=supermart_colors,
    title="Profit vs Discount by Category (Faceted View)"
)

figA_faceted.show()
In [13]:
import plotly.express as px

# Limit extreme outliers (focus on middle 95% of data)
sales_min, sales_max = df["Sales"].quantile([0.01, 0.99])
profit_min, profit_max = df["Profit"].quantile([0.01, 0.99])

figB = px.density_heatmap(
    df, x="Sales", y="Profit",
    nbinsx=40, nbinsy=40,  # smoother binning
    color_continuous_scale="YlGnBu",
    title="Sales vs Profit Density Heatmap (Outliers Removed)"
)

# Zoom into 1st–99th percentile range (removes extreme values)
figB.update_layout(
    xaxis=dict(range=[sales_min, sales_max]),
    yaxis=dict(range=[profit_min, profit_max])
)

figB.update_traces(
    hovertemplate="Sales: %{x}<br>Profit: %{y}<br>Count: %{z}",
    colorbar_title="Density"
)

figB.show()
In [14]:
#  Plot C: Sunburst Chart (Category → Sub Category → City)
figC = px.sunburst(
    df, path=["Category", "Sub Category", "City"],
    values="Sales",
    color="Sales",
    color_continuous_scale=px.colors.sequential.Greens,
    title="Sales Breakdown (Category → Sub Category → City)"
)
figC.show()
In [15]:
#  Plot D: Animated Sales Over Time
# Group sales by month-year
df["YearMonth"] = df["Order Date"].dt.to_period("M").astype(str)
time_sales = df.groupby(["YearMonth", "Category"])["Sales"].sum().reset_index()

figD = px.bar(
    time_sales, x="Category", y="Sales",
    color="Category",
    animation_frame="YearMonth",
    color_discrete_sequence=supermart_colors,
    title="Animated Sales by Category Over Time"
)
figD.show()
In [ ]: